In [1]:
# If done right, this program should
# Shoutout to my bois at StackOverflow - you da real MVPs
# Shoutout to my bois over at StackOverflow - couldn't've done it without you
import pandas as pd
import numpy as np
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.models import HoverTool, Label
import scipy.stats
weatherfile = input("Which weather file would you like to use? ")
df = pd.read_csv(weatherfile)
temp = df.as_matrix(columns=df.columns[3:4])
temp = temp.ravel()
humidity = df.as_matrix(columns=df.columns[4:5])
humidity = humidity.ravel()
pressure = df.as_matrix(columns=df.columns[5:])
pressure = pressure.ravel()
unix_timeweather = df.as_matrix(columns=df.columns[2:3])
i = 0
w_used = eval(raw_input("Which data set do you want? temp, humidity, or pressure? "))
######################################################################################
aqfile = input("Which air quality file would you like to use? ")
df2 = pd.read_csv(aqfile)
PM25 = df2.as_matrix(columns=df2.columns[4:5])
PM1 = df2.as_matrix(columns=df2.columns[3:4])
PM10 = df2.as_matrix(columns=df2.columns[5:])
unix_timeaq = df2.as_matrix(columns=df2.columns[2:3])
aq_used = eval(raw_input("Which data set do you want? PM1, PM25, or PM10? "))
######################################################################################
def find_nearest(array, value):
array = np.asarray(array)
idx = (np.abs(array - value)).argmin()
if np.abs(array[idx]-value) <= 30:
# print str(value) + "Vs" + str(array[idx])
return idx
else:
return None
#######################################################################################
def make_usable(array1, array):
i = len(array1) - 1
while i > 0:
if np.isnan(array[i]) or np.isnan(array1[i]):
del array[i]
del array1[i]
i = i - 1
#######################################################################################
weatherarr = []
aqarr = []
i = 0
while i < len(aq_used):
aqarr.append(float(aq_used[i]))
nearest_time = find_nearest(unix_timeweather, unix_timeaq[i])
if nearest_time is None:
weatherarr.append(np.nan)
else:
weatherarr.append(float(w_used[nearest_time]))
i = i+1
# Plot the arrays #####################################################################
make_usable(weatherarr,aqarr)
hoverp = HoverTool(tooltips=[("(x,y)", "($x, $y)")])
p = figure(tools = [hoverp])
correlation = Label(x=50, y=50, x_units='screen', y_units='screen', text="Pearson r and p: "+ str(scipy.stats.pearsonr(weatherarr, aqarr)),render_mode='css',
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0)
p.add_layout(correlation)
p.circle(x = weatherarr, y = aqarr, color = "firebrick")
show(p)